import os 

#1-take orf
#2-extract S288C
#3-make gff 
#4-change coordinates 


#read data 
#filtered orf
#====================================================================
file_orf="../../03synt_intergenic_orf/02_ORF_tables/table_orf_filtcons.txt"
cmd="grep 'S288' "+ file_orf+" | grep 'stat=1'"  
list_orfcer=os.popen(cmd).read().rstrip().split("\n")


fasta_file="../../02synt_intergenic_fasta/03_allSIDwithrecons/all_SID_notal_rename.fasta"
orf_not="../../03synt_intergenic_orf/02_ORF_tables/01masking_steps/table_orf_sid_NOTaligned_notRM"
orf_ps="../../13_TE_Cer/03_annotations/orf_scer_genomic.gff"

#read sid coordinates 

cmd="grep '>' "+fasta_file
liste_sid=os.popen(cmd).read().rstrip().split("\n")

#make a dictionnary with sid info
dico_sid={}

for sidi_all in liste_sid:
    infosid=sidi_all.split(";")[0]+";"+sidi_all.split(";")[1]
    coordsid=sidi_all.split(";")[2]+";"+sidi_all.split(";")[3]
    dico_sid[infosid.replace(">","")]=coordsid


#REad orf file and convert coordinates aligned to not aligned
#make a dictionnary with key =orf, value =gff line 

#Make a gff dico  scaff cooordinate 
dico_genome={}

with open (orf_not) as orf:
    
    for line in orf:
        line=line.rstrip()
        if line.startswith("#") == False:
            
            array=line.split("\t")
            haplo=array[1]
            syntname=array[4]
            synt_id=syntname+";"+haplo
            
            if haplo=="S288":
                infosynt=dico_sid[synt_id].split(";")
            
                #ORF start et stop ds synt
                starti=int(array[5])
                stopi=int(array[6])
                sensi=array[7]
                
                #begin synt region
                startsynt=int(infosynt[1].split("-")[0])
                
                #Corrected position on chrom
                newstart=starti+startsynt
                newstop=stopi+startsynt
                size=newstop-newstart+1
                chrom=infosynt[0]
                
                cline=[chrom,"1.annotORF.py" ,array[3],\
                str(newstart), str(newstop), ".",sensi,".", "name="+array[3]+ \
                ";stat="+array[2]]
                
                dico_genome[array[3]]="\t".join(cline)
                

with open(orf_ps,"w") as out:
    for orfline in list_orfcer:
        orf=orfline.split("\t")[0]
        test= dico_genome.get(orf, "not")
        if test != "not": 
            out.write(dico_genome[orf]+"\n")
        
        
        
